# Required packages
import pandas as pd
import numpy as np
import catboost
# Sklearn
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score, KFold, StratifiedShuffleSplit
from sklearn import metrics
# Visualisation libraries
## progressbar
import progressbar
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (17, 6)
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
Sign languages are languages that can include simultaneously employing hand gestures, movement, the orientation of the fingers, arms or body, and facial expressions to convey a speaker's ideas [1]. In this article, we develop a stacking classifier model from various scikit-learn classifiers for the Sign Language Digits dataset from Kaggle. The main purpose of this article is to demonstrate implementation of stacking classifiers.
Path = 'Sign_Language_Digits'
# loading the dataset
X = np.load(Path + '/X.npy')
y = np.load(Path + '/Y.npy')
Investigating the data, we can see that y represent numbers 0 to 9 that encoded into dummy variables.
_ , Values = np.nonzero(y[:,:])
Values = pd.Series(Values)
Ind = [Values[Values == i].index.min() for i in Values.unique()]
fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(17, 6), subplot_kw={'xticks': [], 'yticks': []})
ax = ax.ravel()
font = FontProperties()
font.set_family('fantasy')
Ind = [Values[Values == i].index.min() for i in Values.unique()]
for i in range(len(ax)):
_ = ax[i].imshow(X[Ind[i],:,:], cmap = 'gray')
_ = ax[i].set_title('%i' % np.nonzero(y[Ind[i],:]), fontproperties=font, fontsize = 16)
_ = ax[i].set_aspect(1)
y = Values.copy()
del Values
def Dist_Plot(y, Plot_dict):
def ToSeries(x):
if not isinstance(x, pd.Series):
Out = pd.Series(x)
else:
Out = x.copy()
return Out
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths= Plot_dict['column_widths'],
specs=[[{"type": "table"},{"type": "pie"}]])
# Right
fig.add_trace(go.Pie(labels= np.sort(y.unique()),
values= ToSeries(y).value_counts().values, pull=[0.1],
textfont=dict(size=Plot_dict['textfont']),
marker=dict(colors = Plot_dict['PieColors'],
line=dict(color='black', width=1))), row=1, col=2)
fig.update_traces(hole=.5)
fig.update_layout(legend=dict(orientation="v"), legend_title_text= Plot_dict['legend_title'])
# Left
## Table
Table = y.value_counts().to_frame('Count').reset_index(drop = False)
Table = Table.rename(columns = {'index':Plot_dict['legend_title']}).sort_values(by = [Plot_dict['legend_title']])
Table['Percentage'] = np.round(100*(Table['Count']/Table['Count'].sum()),2)
T = Table.copy()
T['Percentage'] = T['Percentage'].map(lambda x: '%% %.2f' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = Plot_dict['TableColors']
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= TableColors[0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.2, 0.4, 0.4],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + 'Dataset Distribution' + '<b>', 'x':0.5,
'y':0.90, 'xanchor': 'center', 'yanchor': 'top'})
if not Plot_dict['height'] == None:
fig.update_layout(height = Plot_dict['height'])
fig.show()
Plot_dict = dict(column_widths = [0.3, 0.7], PieColors = None, textfont = 12,
TableColors = ['Navy','White'], height = 450, legend_title = 'Digits')
Dist_Plot(y, Plot_dict)
StratifiedKFold is a variation of k-fold which returns stratified folds: each set contains approximately the same percentage of samples of each target class as the complete set.
Labels = np.sort(y.unique())
Test_Size = 0.3
sss = StratifiedShuffleSplit(n_splits=1, test_size=Test_Size, random_state=42)
_ = sss.get_n_splits(X, y)
for train_index, test_index in sss.split(X, y):
# X
if isinstance(X, pd.DataFrame):
X_train, X_test = X.loc[train_index], X.loc[test_index]
else:
X_train, X_test = X[train_index], X[test_index]
# y
if isinstance(y, pd.Series):
y_train, y_test = y[train_index], y[test_index]
else:
y_train, y_test = y[train_index], y[test_index]
del sss
# Flattening the X sets
def Flat(Inp): return Inp.reshape(Inp.shape[0],Inp.shape[1]*Inp.shape[2])
X_train, X_test = Flat(X_train), Flat(X_test)
def Train_Test_Dist(X_train, y_train, X_test, y_test, Plot_dict):
def ToSeries(x):
if not isinstance(x, pd.Series):
Out = pd.Series(x)
else:
Out = x.copy()
return Out
fig = make_subplots(rows=1, cols=3, horizontal_spacing = 0.02, column_widths= Plot_dict['column_widths'],
specs=[[{"type": "table"},{'type':'domain'}, {'type':'domain'}]])
# Right
C = 2
for y in [y_train, y_test]:
fig.add_trace(go.Pie(labels= np.sort(y.unique()),
values= ToSeries(y).value_counts().values, pull=[0.1],
textfont=dict(size=Plot_dict['textfont']),
marker=dict(colors = Plot_dict['PieColors'],
line=dict(color='black', width=1))), row=1, col=C)
fig.update_traces(hole=.5)
fig.update_layout(legend=dict(orientation="v"), legend_title_text= Plot_dict['legend_title'])
C+=1
# Left
# Table
Table = pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).astype(str)
T = Table.copy()
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = Plot_dict['TableColors']
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= TableColors[0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.2, 0.4],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + 'Dataset Distribution' + '<b>', 'x':0.5,
'y':0.90, 'xanchor': 'center', 'yanchor': 'top'})
if not Plot_dict['height'] == None:
fig.update_layout(height = Plot_dict['height'])
fig.show()
Plot_dict = dict(column_widths = [0.3, 0.3, 0.3], PieColors = None, textfont = 12,
TableColors = ['Navy','White'], height = 450, legend_title = 'Digits')
Train_Test_Dist(X_train, y_train, X_test, y_test, Plot_dict)
def Confusion_Mat(CM_Train, CM_Test, PD, n_splits = 10):
if n_splits == None:
Titles = ['Train Set', 'Test Set']
else:
Titles = ['Train Set (CV = % i)' % n_splits, 'Test Set (CV = % i)' % n_splits]
CM = [CM_Train, CM_Test]
Cmap = ['Greens', 'YlGn','Blues', 'PuBu']
for i in range(2):
fig, ax = plt.subplots(1, 2, figsize= PD['FS'])
fig.suptitle(Titles[i], weight = 'bold', fontsize = 16)
_ = sns.heatmap(CM[i], annot=True, annot_kws={"size": PD['annot_kws']}, cmap=Cmap[2*i], ax = ax[0],
linewidths = 0.2, cbar_kws={"shrink": PD['shrink']})
_ = ax[0].set_title('Confusion Matrix');
Temp = np.round(CM[i].astype('float') / CM[i].sum(axis=1)[:, np.newaxis], 2)
_ = sns.heatmap(Temp,
annot=True, annot_kws={"size": PD['annot_kws']}, cmap=Cmap[2*i+1], ax = ax[1],
linewidths = 0.4, vmin=0, vmax=1, cbar_kws={"shrink": PD['shrink']})
_ = ax[1].set_title('Normalized Confusion Matrix');
for a in ax:
_ = a.set_xlabel('Predicted labels')
_ = a.set_ylabel('True labels');
_ = a.xaxis.set_ticklabels(Labels)
_ = a.yaxis.set_ticklabels(Labels)
_ = a.set_aspect(1)
CatBoost Classifier is based on gradient boosted decision trees. During training, a set of decision trees is built consecutively. Each successive tree is built with reduced loss compared to the previous trees.
n_splits = 5
N = int(1e2)
model = catboost.CatBoostClassifier(iterations= N,
task_type="GPU",
devices='0:1',
max_ctr_complexity=6,
random_seed= 42, od_type='Iter', od_wait=N, verbose=int(N/10), depth=5)
sss = StratifiedShuffleSplit(n_splits = n_splits, test_size=Test_Size, random_state=42)
if isinstance(X, pd.DataFrame):
X = X.valuesLabels
if isinstance(y, pd.Series):
y = y.values
_ = sss.get_n_splits(X, y)
Reports_Train = []
Reports_Test = []
CM_Train = []
CM_Test = []
for train_index, test_index in sss.split(X, y):
X_train, X_test = X[train_index], X[test_index]
# Flat
X_train, X_test = Flat(X_train), Flat(X_test)
y_train, y_test = y[train_index], y[test_index]
_ = model.fit(X_train, y_train, eval_set= (X_test, y_test), plot = False, verbose = False)
# Train
y_pred = model.predict(X_train)
R = pd.DataFrame(metrics.classification_report(y_train, y_pred, target_names=Labels, output_dict=True)).T
Reports_Train.append(R.values)
CM_Train.append(metrics.confusion_matrix(y_train, y_pred))
# Test
y_pred = model.predict(X_test)
R = pd.DataFrame(metrics.classification_report(y_test, y_pred, target_names=Labels, output_dict=True)).T
Reports_Test.append(R.values)
CM_Test.append(metrics.confusion_matrix(y_test, y_pred))
# Train
ALL = Reports_Train[0].ravel()
CM = CM_Train[0].ravel()
for i in range(1, len(Reports_Train)):
ALL = np.vstack((ALL, Reports_Train[i].ravel()))
CM = np.vstack((CM, CM_Train[i].ravel()))
Mean = pd.DataFrame(ALL.mean(axis = 0).reshape(R.shape), index = R.index, columns = R.columns)
STD = pd.DataFrame(ALL.std(axis = 0).reshape(R.shape), index = R.index, columns = R.columns)
Reports_Train = Mean.applymap(lambda x: ('%.4f' % x))+ ' ± ' +STD.applymap(lambda x: ('%.4f' % x))
CM_Train = CM.mean(axis = 0).reshape(CM_Train[0].shape).round(0).astype(int)
del ALL, Mean, STD
# Test
ALL = Reports_Test[0].ravel()
CM = CM_Test[0].ravel()
for i in range(1, len(Reports_Test)):
ALL = np.vstack((ALL, Reports_Test[i].ravel()))
CM = np.vstack((CM, CM_Test[i].ravel()))
Mean = pd.DataFrame(ALL.mean(axis = 0).reshape(R.shape), index = R.index, columns = R.columns)
STD = pd.DataFrame(ALL.std(axis = 0).reshape(R.shape), index = R.index, columns = R.columns)
Reports_Test = Mean.applymap(lambda x: ('%.4f' % x))+ ' ± ' +STD.applymap(lambda x: ('%.4f' % x))
CM_Test = CM.mean(axis = 0).reshape(CM_Test[0].shape).round(0).astype(int)
del ALL, Mean, STD
Reports_Train = Reports_Train.reset_index().rename(columns ={'index': 'Train Set (CV = % i)' % n_splits})
Reports_Test = Reports_Test.reset_index().rename(columns ={'index': 'Test Set (CV = % i)' % n_splits})
display(Reports_Train.style.hide_index().set_properties(**{'background-color': 'HoneyDew', 'color': 'Black'}).\
set_properties(subset=['Train Set (CV = % i)' % n_splits], **{'background-color': 'SeaGreen', 'color': 'White'}))
display(Reports_Test.style.hide_index().set_properties(**{'background-color': 'Azure', 'color': 'Black'}).\
set_properties(subset=['Test Set (CV = % i)' % n_splits], **{'background-color': 'RoyalBlue', 'color': 'White'}))
PD = dict(FS = (20, 8), annot_kws = 10, shrink = .6)
Confusion_Mat(CM_Train, CM_Test, PD = PD, n_splits = n_splits)
| Train Set (CV = 5) | precision | recall | f1-score | support |
|---|---|---|---|---|
| 0 | 0.9779 ± 0.0078 | 0.9846 ± 0.0103 | 0.9812 ± 0.0048 | 143.0000 ± 0.0000 |
| 1 | 0.9972 ± 0.0034 | 0.9944 ± 0.0052 | 0.9958 ± 0.0034 | 143.0000 ± 0.0000 |
| 2 | 0.9646 ± 0.0132 | 0.9764 ± 0.0150 | 0.9703 ± 0.0078 | 144.0000 ± 0.0000 |
| 3 | 0.9753 ± 0.0092 | 0.9766 ± 0.0094 | 0.9759 ± 0.0069 | 145.0000 ± 0.0000 |
| 4 | 0.9767 ± 0.0034 | 0.9903 ± 0.0056 | 0.9834 ± 0.0041 | 144.0000 ± 0.0000 |
| 5 | 0.9782 ± 0.0078 | 0.9822 ± 0.0070 | 0.9802 ± 0.0054 | 146.0000 ± 0.0000 |
| 6 | 0.9834 ± 0.0127 | 0.9766 ± 0.0112 | 0.9799 ± 0.0106 | 145.0000 ± 0.0000 |
| 7 | 0.9902 ± 0.0034 | 0.9847 ± 0.0028 | 0.9875 ± 0.0017 | 144.0000 ± 0.0000 |
| 8 | 0.9804 ± 0.0051 | 0.9708 ± 0.0028 | 0.9756 ± 0.0022 | 144.0000 ± 0.0000 |
| 9 | 0.9986 ± 0.0028 | 0.9848 ± 0.0091 | 0.9916 ± 0.0052 | 145.0000 ± 0.0000 |
| accuracy | 0.9821 ± 0.0036 | 0.9821 ± 0.0036 | 0.9821 ± 0.0036 | 0.9821 ± 0.0036 |
| macro avg | 0.9823 ± 0.0036 | 0.9821 ± 0.0036 | 0.9821 ± 0.0036 | 1443.0000 ± 0.0000 |
| weighted avg | 0.9823 ± 0.0036 | 0.9821 ± 0.0036 | 0.9821 ± 0.0036 | 1443.0000 ± 0.0000 |
| Test Set (CV = 5) | precision | recall | f1-score | support |
|---|---|---|---|---|
| 0 | 0.7993 ± 0.0292 | 0.8426 ± 0.0222 | 0.8199 ± 0.0181 | 61.0000 ± 0.0000 |
| 1 | 0.8661 ± 0.0530 | 0.8677 ± 0.0449 | 0.8653 ± 0.0325 | 62.0000 ± 0.0000 |
| 2 | 0.7582 ± 0.0206 | 0.7355 ± 0.0079 | 0.7464 ± 0.0076 | 62.0000 ± 0.0000 |
| 3 | 0.7224 ± 0.0220 | 0.7774 ± 0.0572 | 0.7476 ± 0.0291 | 62.0000 ± 0.0000 |
| 4 | 0.8180 ± 0.0364 | 0.8774 ± 0.0347 | 0.8461 ± 0.0282 | 62.0000 ± 0.0000 |
| 5 | 0.8344 ± 0.0683 | 0.7516 ± 0.0316 | 0.7880 ± 0.0196 | 62.0000 ± 0.0000 |
| 6 | 0.7206 ± 0.0737 | 0.7290 ± 0.0504 | 0.7233 ± 0.0547 | 62.0000 ± 0.0000 |
| 7 | 0.9073 ± 0.0315 | 0.8065 ± 0.0289 | 0.8532 ± 0.0165 | 62.0000 ± 0.0000 |
| 8 | 0.7195 ± 0.0341 | 0.7290 ± 0.0615 | 0.7234 ± 0.0444 | 62.0000 ± 0.0000 |
| 9 | 0.9051 ± 0.0260 | 0.8903 ± 0.0413 | 0.8973 ± 0.0290 | 62.0000 ± 0.0000 |
| accuracy | 0.8006 ± 0.0083 | 0.8006 ± 0.0083 | 0.8006 ± 0.0083 | 0.8006 ± 0.0083 |
| macro avg | 0.8051 ± 0.0074 | 0.8007 ± 0.0084 | 0.8011 ± 0.0076 | 619.0000 ± 0.0000 |
| weighted avg | 0.8051 ± 0.0074 | 0.8006 ± 0.0083 | 0.8010 ± 0.0076 | 619.0000 ± 0.0000 |
Test_Size = 0.3
sss = StratifiedShuffleSplit(n_splits=1, test_size=Test_Size, random_state=42)
_ = sss.get_n_splits(X, y)
for train_index, test_index in sss.split(X, y):
# X
if isinstance(X, pd.DataFrame):
X_train, X_test = X.loc[train_index], X.loc[test_index]
else:
X_train, X_test = X[train_index], X[test_index]
# y
if isinstance(y, pd.Series):
y_train, y_test = y[train_index], y[test_index]
else:
y_train, y_test = y[train_index], y[test_index]
del sss
# Flattening the X sets
def Flat(Inp): return Inp.reshape(Inp.shape[0],Inp.shape[1]*Inp.shape[2])
X_train, X_test = Flat(X_train), Flat(X_test)
N = int(1e3)
model = catboost.CatBoostClassifier(iterations= N,
task_type="GPU",
devices='0:1',
max_ctr_complexity=6,
random_seed= 42, od_type='Iter', od_wait=N, verbose=int(N/20), depth=5)
_ = model.fit(X_train, y_train, eval_set= (X_test, y_test), plot = True)
Learning rate set to 0.109123 0: learn: 2.1523302 test: 2.1621319 best: 2.1621319 (0) total: 76.7ms remaining: 1m 16s 50: learn: 0.7059037 test: 0.9150597 best: 0.9150597 (50) total: 3.16s remaining: 58.9s 100: learn: 0.5120216 test: 0.8046392 best: 0.8046392 (100) total: 5.88s remaining: 52.4s 150: learn: 0.4222760 test: 0.7544001 best: 0.7544001 (150) total: 8.43s remaining: 47.4s 200: learn: 0.3601091 test: 0.7236471 best: 0.7236471 (200) total: 11s remaining: 43.6s 250: learn: 0.3080804 test: 0.6944863 best: 0.6944863 (250) total: 13.5s remaining: 40.4s 300: learn: 0.2717313 test: 0.6777855 best: 0.6777855 (300) total: 16.1s remaining: 37.4s 350: learn: 0.2434064 test: 0.6623253 best: 0.6623253 (350) total: 18.6s remaining: 34.4s 400: learn: 0.2188545 test: 0.6482199 best: 0.6482199 (400) total: 21.1s remaining: 31.6s 450: learn: 0.1933839 test: 0.6364678 best: 0.6364678 (450) total: 23.7s remaining: 28.9s 500: learn: 0.1745160 test: 0.6256329 best: 0.6256329 (500) total: 26.3s remaining: 26.2s 550: learn: 0.1568925 test: 0.6182095 best: 0.6182095 (550) total: 28.8s remaining: 23.5s 600: learn: 0.1430282 test: 0.6107726 best: 0.6106296 (599) total: 31.4s remaining: 20.8s 650: learn: 0.1301996 test: 0.6052072 best: 0.6051503 (649) total: 33.9s remaining: 18.2s 700: learn: 0.1195716 test: 0.5981281 best: 0.5981281 (700) total: 36.5s remaining: 15.6s 750: learn: 0.1090064 test: 0.5914645 best: 0.5914560 (749) total: 39.1s remaining: 13s 800: learn: 0.1006311 test: 0.5876982 best: 0.5875942 (798) total: 41.7s remaining: 10.4s 850: learn: 0.0932891 test: 0.5825925 best: 0.5825925 (850) total: 44.2s remaining: 7.75s 900: learn: 0.0859777 test: 0.5779413 best: 0.5777730 (895) total: 46.8s remaining: 5.15s 950: learn: 0.0794513 test: 0.5727897 best: 0.5721051 (946) total: 49.4s remaining: 2.55s 999: learn: 0.0736817 test: 0.5702936 best: 0.5702936 (999) total: 52s remaining: 0us bestTest = 0.5702935683 bestIteration = 999
# Train
y_pred = model.predict(X_train)
Reports_Train = pd.DataFrame(metrics.classification_report(y_train, y_pred, target_names=Labels, output_dict=True)).T
CM_Train = metrics.confusion_matrix(y_train, y_pred)
# Test
y_pred = model.predict(X_test)
Reports_Test = pd.DataFrame(metrics.classification_report(y_test, y_pred, target_names=Labels, output_dict=True)).T
CM_Test = metrics.confusion_matrix(y_test, y_pred)
Reports_Train = Reports_Train.reset_index().rename(columns ={'index': 'Train Set'})
Reports_Test = Reports_Test.reset_index().rename(columns ={'index': 'Test Set'})
display(Reports_Train.style.hide_index().set_properties(**{'background-color': 'HoneyDew', 'color': 'Black'}).\
set_properties(subset=['Train Set'], **{'background-color': 'SeaGreen', 'color': 'White'}))
display(Reports_Test.style.hide_index().set_properties(**{'background-color': 'Azure', 'color': 'Black'}).\
set_properties(subset=['Test Set'], **{'background-color': 'RoyalBlue', 'color': 'White'}))
PD = dict(FS = (20, 8), annot_kws = 10, shrink = .6)
Confusion_Mat(CM_Train, CM_Test, PD = PD, n_splits = None)
| Train Set | precision | recall | f1-score | support |
|---|---|---|---|---|
| 0 | 1.000000 | 1.000000 | 1.000000 | 143.000000 |
| 1 | 1.000000 | 1.000000 | 1.000000 | 143.000000 |
| 2 | 1.000000 | 1.000000 | 1.000000 | 144.000000 |
| 3 | 1.000000 | 1.000000 | 1.000000 | 145.000000 |
| 4 | 1.000000 | 1.000000 | 1.000000 | 144.000000 |
| 5 | 1.000000 | 1.000000 | 1.000000 | 146.000000 |
| 6 | 1.000000 | 1.000000 | 1.000000 | 145.000000 |
| 7 | 1.000000 | 1.000000 | 1.000000 | 144.000000 |
| 8 | 1.000000 | 1.000000 | 1.000000 | 144.000000 |
| 9 | 1.000000 | 1.000000 | 1.000000 | 145.000000 |
| accuracy | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| macro avg | 1.000000 | 1.000000 | 1.000000 | 1443.000000 |
| weighted avg | 1.000000 | 1.000000 | 1.000000 | 1443.000000 |
| Test Set | precision | recall | f1-score | support |
|---|---|---|---|---|
| 0 | 0.833333 | 0.901639 | 0.866142 | 61.000000 |
| 1 | 0.982456 | 0.903226 | 0.941176 | 62.000000 |
| 2 | 0.750000 | 0.774194 | 0.761905 | 62.000000 |
| 3 | 0.670886 | 0.854839 | 0.751773 | 62.000000 |
| 4 | 0.892308 | 0.935484 | 0.913386 | 62.000000 |
| 5 | 0.867925 | 0.741935 | 0.800000 | 62.000000 |
| 6 | 0.844828 | 0.790323 | 0.816667 | 62.000000 |
| 7 | 0.962264 | 0.822581 | 0.886957 | 62.000000 |
| 8 | 0.633333 | 0.612903 | 0.622951 | 62.000000 |
| 9 | 0.921875 | 0.951613 | 0.936508 | 62.000000 |
| accuracy | 0.828756 | 0.828756 | 0.828756 | 0.828756 |
| macro avg | 0.835921 | 0.828874 | 0.829746 | 619.000000 |
| weighted avg | 0.835925 | 0.828756 | 0.829688 | 619.000000 |
The best result for each metric calculated on each validation dataset.
display(pd.DataFrame({'Train Set': {'R2 Score': model.score(X_train, y_train)},
'Validation Set': {'R2 Score': model.score(X_test, y_test)}}))
| Train Set | Validation Set | |
|---|---|---|
| R2 Score | 1.0 | 0.828756 |